#!/usr/bin/python
from ghost import Ghost, TimeoutError, Error
import time, random
import win32com.client

COLUMNS = 8 # number of columns
URL = "http://wszw.hzs.mofcom.gov.cn/fecp/fem/corp/fem_cert_stat_view_list.jsp"
MAX_TRY = 3 # try to load a page MAX_TRY times

ghost = Ghost(user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:28.0) Gecko/20100101 Firefox/28.0", wait_timeout = 8, qt_debug = True, download_images = False)

def open(url, try_count = 0):
	try:
		page, extra_resources = ghost.open(url)
		assert page.http_status == 200
	except TimeoutError:
		print("warning: timeout")
		assert try_count < MAX_TRY
		try_count = try_count + 1
		open(url, try_count)

def getPageCount():
	page_count, resources = ghost.evaluate_js_file("page_count.js")
	return int(page_count)

def getCurrentPage():
	current_page, resources = ghost.evaluate_js_file("current_page.js")
	return int(current_page)

def getHeaders():
	headers, resources = ghost.evaluate_js_file("headers.js")
	return headers

def getValues():
	values, resources = ghost.evaluate_js_file("values.js")
	return values

def setCurrentPage(current_page, reload = False):
	try:
		if reload:
			ghost.open(URL)
		ghost.set_field_value("input[name=Grid1toPageNo]", "%d" % (current_page))
		ghost.click("html body table tbody tr td form table tbody tr td div#Grid1MainLayer table tbody tr td.body input.buttonclass", expect_loading = True)
	except Error:
		print("warning: reload")
		time.sleep(random.random() * 2)
		setCurrentPage(current_page, True)

# open url
open(URL)

# create excel
app = win32com.client.Dispatch("Excel.Application")
app.Visible = 1
workbook = app.Workbooks.Add()
sheet = workbook.Sheets(1)
current_row = 1

# headers
headers = getHeaders()
assert len(headers) == COLUMNS
for header in headers:
	print(header, end = ' ')
print("\n")
for i in range(0, len(headers)):
	sheet.Cells(current_row, i + 1).Value = headers[i]
current_row = current_row + 1

while True:
	# get page count
	page_count = getPageCount()
	assert page_count != 0

	# get current page
	current_page = getCurrentPage()
	print("current page: %d" % (current_page))
	assert current_page >= 1

	# get contents
	values = getValues()
	assert len(values) != 0 and len(values) % COLUMNS == 0
	for i in range(0, len(values)):
		if i > 0 and i % COLUMNS == 0:
			print("")
		print(values[i], end = ' ')
	print("\n")
	j = 0
	for i in range(0, len(values)):
		if i > 0 and j == COLUMNS:
			current_row = current_row + 1
			j = 0
		sheet.Cells(current_row, j + 1).Value = values[i]
		j = j + 1
	current_row = current_row + 1

	# sleep
	time.sleep(random.random())

	# next page
	if current_page < page_count:
		setCurrentPage(current_page + 1)
	else:
		break
		
workbook.SaveAs("result.xlsx")
ghost.exit()